import org.apache.spark.sql.{DataFrame, SparkSession}

object DataQuery {
  def main(args: Array[String]): Unit = {
    val spark = SparkSession.builder()
      .appName("SparkSQL-DataQuery")
      .master("local[*]")
      .getOrCreate()

    // 读取数据
    val studentsDF = spark.read
      .format("jdbc")
      .option("url", "jdbc:mysql://localhost:3306/sparksql_practice")
      .option("dbtable", "students")
      .option("user", "your_username")
      .option("password", "your_password")
      .option("driver", "com.mysql.cj.jdbc.Driver")
      .load()

    // 查询年龄大于20岁的学生
    val olderThan20 = studentsDF.filter($"age" > 20)
    olderThan20.show()

    // 按专业和入学日期排序
    val sortedStudents = studentsDF.orderBy($"major".asc, $"enrollment_date".asc)
    sortedStudents.show()

    // 统计各专业学生人数
    val majorCounts = studentsDF.groupBy("major").count()
    majorCounts.show()

    spark.stop()
  }
}
